{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8f40cb8f-9104-48ce-8ee6-fffa4671045c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from glob import glob\n",
    "import pandas as pd\n",
    "from scipy.stats import mannwhitneyu\n",
    "from statsmodels.sandbox.stats.multicomp import multipletests\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "\n",
    "sns.set_style('whitegrid')\n",
    "\n",
    "def p_adjust(pvalues, method='fdr_bh'):\n",
    "    res = multipletests(pvalues, method=method)\n",
    "    return np.array(res[1], dtype=float)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3af6be6-d931-4378-9330-a064cbc0fb22",
   "metadata": {},
   "source": [
    "# Correlate nasal 16S with vaccine response\n",
    "\n",
    "##### Michael Shaffer\n",
    "##### 7/21/22\n",
    "##### Merck ESC, Sys bio group\n",
    "\n",
    "To look for associations between the nasal microbiome and vaccine response we have calculated correlations between the abundances of individual OTUs and the continuous titer measurements from 1 year of life."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a1d18d0e-aa22-4f85-be4e-ed4c6f4a2516",
   "metadata": {},
   "source": [
    "## Read in data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "663ff2b9-d055-41a8-80c7-70179b7c7872",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SubmissionType</th>\n",
       "      <th>SampleNumber</th>\n",
       "      <th>SampleIDValidation</th>\n",
       "      <th>DiversigenCheckInSampleName</th>\n",
       "      <th>ReplacesLowVolumeSampleID</th>\n",
       "      <th>BoxLocation</th>\n",
       "      <th>SampleType</th>\n",
       "      <th>SampleSource</th>\n",
       "      <th>SequencingType</th>\n",
       "      <th>BabyN</th>\n",
       "      <th>...</th>\n",
       "      <th>median_mmNorm_PCV</th>\n",
       "      <th>median_mmNorm_DTAPHib</th>\n",
       "      <th>protectNorm_Dip</th>\n",
       "      <th>protectNorm_TET</th>\n",
       "      <th>protectNorm_PRP (Hib)</th>\n",
       "      <th>protectNorm_PT</th>\n",
       "      <th>protectNorm_PRN</th>\n",
       "      <th>protectNorm_FHA</th>\n",
       "      <th>geommean_protectNorm</th>\n",
       "      <th>VR_group_v2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SampleID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>106_V5_NS_A1</th>\n",
       "      <td>Primary in Tube</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Box 1, A3</td>\n",
       "      <td>Nasal Swab</td>\n",
       "      <td>Human Infant</td>\n",
       "      <td>16S</td>\n",
       "      <td>106</td>\n",
       "      <td>...</td>\n",
       "      <td>0.061955</td>\n",
       "      <td>0.052874</td>\n",
       "      <td>2.1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.600000</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>1.3750</td>\n",
       "      <td>1.140388</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107_V2_NS_A1</th>\n",
       "      <td>Primary in Tube</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Box 1, A4</td>\n",
       "      <td>Nasal Swab</td>\n",
       "      <td>Human Infant</td>\n",
       "      <td>16S</td>\n",
       "      <td>107</td>\n",
       "      <td>...</td>\n",
       "      <td>0.958142</td>\n",
       "      <td>0.114018</td>\n",
       "      <td>4.4</td>\n",
       "      <td>5.2</td>\n",
       "      <td>10.666667</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>1.1250</td>\n",
       "      <td>0.3750</td>\n",
       "      <td>1.783418</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107_V3_NS_A1</th>\n",
       "      <td>Primary in Tube</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107_V8_NS_A1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Box 1, A5</td>\n",
       "      <td>Nasal Swab</td>\n",
       "      <td>Human Infant</td>\n",
       "      <td>16S</td>\n",
       "      <td>107</td>\n",
       "      <td>...</td>\n",
       "      <td>0.958142</td>\n",
       "      <td>0.114018</td>\n",
       "      <td>4.4</td>\n",
       "      <td>5.2</td>\n",
       "      <td>10.666667</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>1.1250</td>\n",
       "      <td>0.3750</td>\n",
       "      <td>1.783418</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107_V5_NS_A1</th>\n",
       "      <td>Primary in Tube</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Box 1, A8</td>\n",
       "      <td>Nasal Swab</td>\n",
       "      <td>Human Infant</td>\n",
       "      <td>16S</td>\n",
       "      <td>107</td>\n",
       "      <td>...</td>\n",
       "      <td>0.958142</td>\n",
       "      <td>0.114018</td>\n",
       "      <td>4.4</td>\n",
       "      <td>5.2</td>\n",
       "      <td>10.666667</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>1.1250</td>\n",
       "      <td>0.3750</td>\n",
       "      <td>1.783418</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108_V4_NS_A1</th>\n",
       "      <td>Primary in Tube</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Box 1, A9</td>\n",
       "      <td>Nasal Swab</td>\n",
       "      <td>Human Infant</td>\n",
       "      <td>16S</td>\n",
       "      <td>108</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003102</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>1.800000</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>0.1875</td>\n",
       "      <td>0.449420</td>\n",
       "      <td>LVR</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               SubmissionType  SampleNumber  SampleIDValidation  \\\n",
       "SampleID                                                          \n",
       "106_V5_NS_A1  Primary in Tube             2                 NaN   \n",
       "107_V2_NS_A1  Primary in Tube             3                 NaN   \n",
       "107_V3_NS_A1  Primary in Tube             4                 NaN   \n",
       "107_V5_NS_A1  Primary in Tube             5                 NaN   \n",
       "108_V4_NS_A1  Primary in Tube             6                 NaN   \n",
       "\n",
       "             DiversigenCheckInSampleName ReplacesLowVolumeSampleID  \\\n",
       "SampleID                                                             \n",
       "106_V5_NS_A1                         NaN                       NaN   \n",
       "107_V2_NS_A1                         NaN                       NaN   \n",
       "107_V3_NS_A1                107_V8_NS_A1                       NaN   \n",
       "107_V5_NS_A1                         NaN                       NaN   \n",
       "108_V4_NS_A1                         NaN                       NaN   \n",
       "\n",
       "             BoxLocation  SampleType  SampleSource SequencingType  BabyN  ...  \\\n",
       "SampleID                                                                  ...   \n",
       "106_V5_NS_A1   Box 1, A3  Nasal Swab  Human Infant            16S    106  ...   \n",
       "107_V2_NS_A1   Box 1, A4  Nasal Swab  Human Infant            16S    107  ...   \n",
       "107_V3_NS_A1   Box 1, A5  Nasal Swab  Human Infant            16S    107  ...   \n",
       "107_V5_NS_A1   Box 1, A8  Nasal Swab  Human Infant            16S    107  ...   \n",
       "108_V4_NS_A1   Box 1, A9  Nasal Swab  Human Infant            16S    108  ...   \n",
       "\n",
       "              median_mmNorm_PCV median_mmNorm_DTAPHib  protectNorm_Dip  \\\n",
       "SampleID                                                                 \n",
       "106_V5_NS_A1           0.061955              0.052874              2.1   \n",
       "107_V2_NS_A1           0.958142              0.114018              4.4   \n",
       "107_V3_NS_A1           0.958142              0.114018              4.4   \n",
       "107_V5_NS_A1           0.958142              0.114018              4.4   \n",
       "108_V4_NS_A1           0.003102              0.000000              0.5   \n",
       "\n",
       "             protectNorm_TET  protectNorm_PRP (Hib) protectNorm_PT  \\\n",
       "SampleID                                                             \n",
       "106_V5_NS_A1             3.0               2.600000         0.3125   \n",
       "107_V2_NS_A1             5.2              10.666667         0.3125   \n",
       "107_V3_NS_A1             5.2              10.666667         0.3125   \n",
       "107_V5_NS_A1             5.2              10.666667         0.3125   \n",
       "108_V4_NS_A1             0.5               1.800000         0.3125   \n",
       "\n",
       "              protectNorm_PRN protectNorm_FHA geommean_protectNorm  \\\n",
       "SampleID                                                             \n",
       "106_V5_NS_A1           0.3125          1.3750             1.140388   \n",
       "107_V2_NS_A1           1.1250          0.3750             1.783418   \n",
       "107_V3_NS_A1           1.1250          0.3750             1.783418   \n",
       "107_V5_NS_A1           1.1250          0.3750             1.783418   \n",
       "108_V4_NS_A1           0.3125          0.1875             0.449420   \n",
       "\n",
       "              VR_group_v2  \n",
       "SampleID                   \n",
       "106_V5_NS_A1          NVR  \n",
       "107_V2_NS_A1          NVR  \n",
       "107_V3_NS_A1          NVR  \n",
       "107_V5_NS_A1          NVR  \n",
       "108_V4_NS_A1          LVR  \n",
       "\n",
       "[5 rows x 84 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta = pd.read_csv('../../data/metadata/nasal/nasal_metadata.csv', index_col='SampleID')\n",
    "meta['age_at_collection'] = (pd.to_datetime(meta['CollectionDate']) - pd.to_datetime(meta['DOB'])).dt.days\n",
    "meta = pd.concat([meta,\n",
    "                  pd.read_csv('../../data/metadata/nasal/nasal_abx_usage.csv', index_col='SampleID'),\n",
    "                  pd.read_csv('../../data/metadata/nasal/nasal_titers_yr1.csv', index_col='SampleID')],\n",
    "                 axis=1)\n",
    "meta = meta.loc[~pd.isna(meta['median_mmNorm'])]\n",
    "meta.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "07db70cc-f930-477f-ab3f-27777f8af4f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>101_S1_NS_A1</th>\n",
       "      <th>101_V3_NS_A1</th>\n",
       "      <th>101_V5_NS_A1</th>\n",
       "      <th>102_V1_NS_A1</th>\n",
       "      <th>102_V3_NS_A1</th>\n",
       "      <th>102_V5_NS_A1</th>\n",
       "      <th>102_V6_NS_A1</th>\n",
       "      <th>103_S1_NS_A1</th>\n",
       "      <th>103_S3_NS_A1</th>\n",
       "      <th>103_V10_NS_A1</th>\n",
       "      <th>...</th>\n",
       "      <th>MSA2002_5A</th>\n",
       "      <th>MSA2002_5B</th>\n",
       "      <th>MSA2002_6A</th>\n",
       "      <th>MSA2002_6B</th>\n",
       "      <th>MSA2002_7A</th>\n",
       "      <th>MSA2002_7B</th>\n",
       "      <th>MSA2002_8A</th>\n",
       "      <th>MSA2002_8B</th>\n",
       "      <th>MSA2002_9A</th>\n",
       "      <th>MSA2002_9B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Otu0001</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1593</td>\n",
       "      <td>7320</td>\n",
       "      <td>606</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Otu0002</th>\n",
       "      <td>5845</td>\n",
       "      <td>9876</td>\n",
       "      <td>692</td>\n",
       "      <td>557</td>\n",
       "      <td>783</td>\n",
       "      <td>509</td>\n",
       "      <td>6047</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>114</td>\n",
       "      <td>126</td>\n",
       "      <td>104</td>\n",
       "      <td>115</td>\n",
       "      <td>119</td>\n",
       "      <td>111</td>\n",
       "      <td>168</td>\n",
       "      <td>147</td>\n",
       "      <td>83</td>\n",
       "      <td>103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Otu0003</th>\n",
       "      <td>117</td>\n",
       "      <td>0</td>\n",
       "      <td>879</td>\n",
       "      <td>4392</td>\n",
       "      <td>1428</td>\n",
       "      <td>528</td>\n",
       "      <td>87</td>\n",
       "      <td>877</td>\n",
       "      <td>2642</td>\n",
       "      <td>1498</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Otu0004</th>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>1104</td>\n",
       "      <td>1</td>\n",
       "      <td>6133</td>\n",
       "      <td>475</td>\n",
       "      <td>1</td>\n",
       "      <td>109</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Otu0005</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4173</td>\n",
       "      <td>24</td>\n",
       "      <td>3121</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 980 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         101_S1_NS_A1  101_V3_NS_A1  101_V5_NS_A1  102_V1_NS_A1  102_V3_NS_A1  \\\n",
       "Otu0001             1             0             0             1             2   \n",
       "Otu0002          5845          9876           692           557           783   \n",
       "Otu0003           117             0           879          4392          1428   \n",
       "Otu0004             9             1          1104             1          6133   \n",
       "Otu0005             0             0             0             0             0   \n",
       "\n",
       "         102_V5_NS_A1  102_V6_NS_A1  103_S1_NS_A1  103_S3_NS_A1  \\\n",
       "Otu0001             0             2          1593          7320   \n",
       "Otu0002           509          6047             1             0   \n",
       "Otu0003           528            87           877          2642   \n",
       "Otu0004           475             1           109             2   \n",
       "Otu0005             1             0          4173            24   \n",
       "\n",
       "         103_V10_NS_A1  ...  MSA2002_5A  MSA2002_5B  MSA2002_6A  MSA2002_6B  \\\n",
       "Otu0001            606  ...           3           4           1           1   \n",
       "Otu0002              3  ...         114         126         104         115   \n",
       "Otu0003           1498  ...           0           0           0           0   \n",
       "Otu0004             14  ...           0           0           1           0   \n",
       "Otu0005           3121  ...           0           0           0           0   \n",
       "\n",
       "         MSA2002_7A  MSA2002_7B  MSA2002_8A  MSA2002_8B  MSA2002_9A  \\\n",
       "Otu0001           0           2           1           1           1   \n",
       "Otu0002         119         111         168         147          83   \n",
       "Otu0003           0           0           0           0           0   \n",
       "Otu0004           0           0           0           0           0   \n",
       "Otu0005           0           0           0           0           0   \n",
       "\n",
       "         MSA2002_9B  \n",
       "Otu0001           0  \n",
       "Otu0002         103  \n",
       "Otu0003           0  \n",
       "Otu0004           0  \n",
       "Otu0005           0  \n",
       "\n",
       "[5 rows x 980 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts = pd.read_csv('../../data/nasal/otu_table.gt10_rar10K.tsv', sep='\\t', index_col=0).transpose()\n",
    "counts.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "be1270b2-e08a-4060-a5e3-3008288626f4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(775, 84)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/6t/1w2t3qmd1rx81mfw9sq_tfpr0000gn/T/ipykernel_11819/52385233.py:2: FutureWarning: Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.\n",
      "  meta = meta.loc[in_both].sort_values(['BabyN', 'age_at_collection'])\n"
     ]
    }
   ],
   "source": [
    "in_both = set(meta.index) & set(counts.columns)\n",
    "meta = meta.loc[in_both].sort_values(['BabyN', 'age_at_collection'])\n",
    "print(meta.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "19ccfe3f-9562-46f7-8adf-91892eadfe82",
   "metadata": {},
   "outputs": [],
   "source": [
    "meta_v5 = meta.query(\"VisitCode == 'V5'\")\n",
    "counts_v5 = counts[meta_v5.index]\n",
    "counts_v5 = counts_v5.loc[(counts_v5 > 0).sum(axis=1) > counts_v5.shape[1]*.2]\n",
    "\n",
    "meta_v6 = meta.query(\"VisitCode == 'V6'\")\n",
    "counts_v6 = counts[meta_v6.index]\n",
    "counts_v6 = counts_v6.loc[(counts_v6 > 0).sum(axis=1) > counts_v6.shape[1]*.2]\n",
    "\n",
    "meta_v7 = meta.query(\"VisitCode == 'V7'\")\n",
    "counts_v7 = counts[meta_v7.index]\n",
    "counts_v7 = counts_v7.loc[(counts_v7 > 0).sum(axis=1) > counts_v7.shape[1]*.2]\n",
    "\n",
    "meta_v9 = meta.query(\"VisitCode == 'V9'\")\n",
    "counts_v9 = counts[meta_v9.index]\n",
    "counts_v9 = counts_v9.loc[(counts_v9 > 0).sum(axis=1) > counts_v9.shape[1]*.2]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "03b6ca75-7f7f-4ffb-a575-5d399ab746e5",
   "metadata": {},
   "source": [
    "## Correlations with median titer values\n",
    "\n",
    "We will use Spearman's R as our correlation metric and use OTU abundances from the 2 month (V5), 4 month (V6), 6 month (V7) and 1 year (V9) time points. 2, 4 and 6 months are when vaccinations are given and 1 year is when titers were measured."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "07c9fcf2-97a8-47a9-833d-e3eee803110d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>OTU</th>\n",
       "      <th>LVR_mean</th>\n",
       "      <th>NVR_mean</th>\n",
       "      <th>statistic</th>\n",
       "      <th>p_value</th>\n",
       "      <th>p_adj</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Otu0029</td>\n",
       "      <td>2.500</td>\n",
       "      <td>35.719298</td>\n",
       "      <td>170.0</td>\n",
       "      <td>0.211242</td>\n",
       "      <td>0.911735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Otu0009</td>\n",
       "      <td>59.750</td>\n",
       "      <td>192.385965</td>\n",
       "      <td>165.0</td>\n",
       "      <td>0.211974</td>\n",
       "      <td>0.911735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Otu0025</td>\n",
       "      <td>3.250</td>\n",
       "      <td>30.175439</td>\n",
       "      <td>175.5</td>\n",
       "      <td>0.282830</td>\n",
       "      <td>0.911735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Otu0023</td>\n",
       "      <td>0.125</td>\n",
       "      <td>10.947368</td>\n",
       "      <td>186.0</td>\n",
       "      <td>0.283525</td>\n",
       "      <td>0.911735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Otu0015</td>\n",
       "      <td>32.000</td>\n",
       "      <td>53.333333</td>\n",
       "      <td>175.5</td>\n",
       "      <td>0.294685</td>\n",
       "      <td>0.911735</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        OTU  LVR_mean    NVR_mean  statistic   p_value     p_adj\n",
       "19  Otu0029     2.500   35.719298      170.0  0.211242  0.911735\n",
       "8   Otu0009    59.750  192.385965      165.0  0.211974  0.911735\n",
       "17  Otu0025     3.250   30.175439      175.5  0.282830  0.911735\n",
       "16  Otu0023     0.125   10.947368      186.0  0.283525  0.911735\n",
       "11  Otu0015    32.000   53.333333      175.5  0.294685  0.911735"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts_stats_v5_rows = list()\n",
    "for otu, row in counts_v5.iterrows():\n",
    "    lvr_abunds = row[meta_v5.query('VR_group == \"LVR\"').index]\n",
    "    nvr_abunds = row[meta_v5.query('VR_group == \"NVR\"').index]\n",
    "    # check for not all zeros\n",
    "    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2\n",
    "    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2\n",
    "    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10\n",
    "    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10\n",
    "    if lvr_gt_20 or nvr_gt_20:\n",
    "        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)\n",
    "        counts_stats_v5_rows.append([otu, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])\n",
    "counts_stats_v5 = pd.DataFrame(counts_stats_v5_rows, columns=['OTU', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')\n",
    "counts_stats_v5['p_adj'] = p_adjust(counts_stats_v5['p_value'])\n",
    "counts_stats_v5.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5d4719b9-ee9c-4fae-9806-cf6641bd571c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>OTU</th>\n",
       "      <th>LVR_mean</th>\n",
       "      <th>NVR_mean</th>\n",
       "      <th>statistic</th>\n",
       "      <th>p_value</th>\n",
       "      <th>p_adj</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Otu0027</td>\n",
       "      <td>53.545455</td>\n",
       "      <td>13.711538</td>\n",
       "      <td>371.0</td>\n",
       "      <td>0.083314</td>\n",
       "      <td>0.933915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Otu0004</td>\n",
       "      <td>2494.545455</td>\n",
       "      <td>1272.884615</td>\n",
       "      <td>375.5</td>\n",
       "      <td>0.106360</td>\n",
       "      <td>0.933915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Otu0039</td>\n",
       "      <td>14.727273</td>\n",
       "      <td>17.826923</td>\n",
       "      <td>231.0</td>\n",
       "      <td>0.307787</td>\n",
       "      <td>0.933915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Otu0028</td>\n",
       "      <td>16.454545</td>\n",
       "      <td>11.076923</td>\n",
       "      <td>332.0</td>\n",
       "      <td>0.332889</td>\n",
       "      <td>0.933915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Otu0021</td>\n",
       "      <td>34.909091</td>\n",
       "      <td>49.923077</td>\n",
       "      <td>234.5</td>\n",
       "      <td>0.354753</td>\n",
       "      <td>0.933915</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        OTU     LVR_mean     NVR_mean  statistic   p_value     p_adj\n",
       "18  Otu0027    53.545455    13.711538      371.0  0.083314  0.933915\n",
       "3   Otu0004  2494.545455  1272.884615      375.5  0.106360  0.933915\n",
       "25  Otu0039    14.727273    17.826923      231.0  0.307787  0.933915\n",
       "19  Otu0028    16.454545    11.076923      332.0  0.332889  0.933915\n",
       "14  Otu0021    34.909091    49.923077      234.5  0.354753  0.933915"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts_stats_v6_rows = list()\n",
    "for otu, row in counts_v6.iterrows():\n",
    "    lvr_abunds = row[meta_v6.query('VR_group == \"LVR\"').index]\n",
    "    nvr_abunds = row[meta_v6.query('VR_group == \"NVR\"').index]\n",
    "    # check for not all zeros\n",
    "    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2\n",
    "    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2\n",
    "    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10\n",
    "    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10\n",
    "    if lvr_gt_20 or nvr_gt_20:\n",
    "        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)\n",
    "        counts_stats_v6_rows.append([otu, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])\n",
    "counts_stats_v6 = pd.DataFrame(counts_stats_v6_rows, columns=['OTU', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')\n",
    "counts_stats_v6['p_adj'] = p_adjust(counts_stats_v6['p_value'])\n",
    "counts_stats_v6.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "dab2c6b9-30ad-4be3-b858-2114a8308d00",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>OTU</th>\n",
       "      <th>LVR_mean</th>\n",
       "      <th>NVR_mean</th>\n",
       "      <th>statistic</th>\n",
       "      <th>p_value</th>\n",
       "      <th>p_adj</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Otu0004</td>\n",
       "      <td>2081.181818</td>\n",
       "      <td>602.018519</td>\n",
       "      <td>426.5</td>\n",
       "      <td>0.023450</td>\n",
       "      <td>0.609704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Otu0025</td>\n",
       "      <td>26.636364</td>\n",
       "      <td>19.611111</td>\n",
       "      <td>392.0</td>\n",
       "      <td>0.091636</td>\n",
       "      <td>0.733972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Otu0034</td>\n",
       "      <td>14.909091</td>\n",
       "      <td>20.277778</td>\n",
       "      <td>389.0</td>\n",
       "      <td>0.103835</td>\n",
       "      <td>0.733972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Otu0005</td>\n",
       "      <td>1484.272727</td>\n",
       "      <td>1046.907407</td>\n",
       "      <td>371.5</td>\n",
       "      <td>0.112919</td>\n",
       "      <td>0.733972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Otu0016</td>\n",
       "      <td>27.636364</td>\n",
       "      <td>41.518519</td>\n",
       "      <td>378.0</td>\n",
       "      <td>0.150701</td>\n",
       "      <td>0.783644</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        OTU     LVR_mean     NVR_mean  statistic   p_value     p_adj\n",
       "3   Otu0004  2081.181818   602.018519      426.5  0.023450  0.609704\n",
       "17  Otu0025    26.636364    19.611111      392.0  0.091636  0.733972\n",
       "20  Otu0034    14.909091    20.277778      389.0  0.103835  0.733972\n",
       "4   Otu0005  1484.272727  1046.907407      371.5  0.112919  0.733972\n",
       "12  Otu0016    27.636364    41.518519      378.0  0.150701  0.783644"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts_stats_v7_rows = list()\n",
    "for otu, row in counts_v7.iterrows():\n",
    "    lvr_abunds = row[meta_v7.query('VR_group == \"LVR\"').index]\n",
    "    nvr_abunds = row[meta_v7.query('VR_group == \"NVR\"').index]\n",
    "    # check for not all zeros\n",
    "    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2\n",
    "    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2\n",
    "    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10\n",
    "    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10\n",
    "    if lvr_gt_20 or nvr_gt_20:\n",
    "        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)\n",
    "        counts_stats_v7_rows.append([otu, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])\n",
    "counts_stats_v7 = pd.DataFrame(counts_stats_v7_rows, columns=['OTU', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')\n",
    "counts_stats_v7['p_adj'] = p_adjust(counts_stats_v7['p_value'])\n",
    "counts_stats_v7.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "42e16d24-3221-4620-ad29-da3021ed7d0c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>OTU</th>\n",
       "      <th>LVR_mean</th>\n",
       "      <th>NVR_mean</th>\n",
       "      <th>statistic</th>\n",
       "      <th>p_value</th>\n",
       "      <th>p_adj</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Otu0008</td>\n",
       "      <td>95.888889</td>\n",
       "      <td>40.932203</td>\n",
       "      <td>400.0</td>\n",
       "      <td>0.012107</td>\n",
       "      <td>0.472181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Otu0036</td>\n",
       "      <td>3.666667</td>\n",
       "      <td>29.661017</td>\n",
       "      <td>183.0</td>\n",
       "      <td>0.122226</td>\n",
       "      <td>0.932017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Otu0004</td>\n",
       "      <td>1217.444444</td>\n",
       "      <td>599.203390</td>\n",
       "      <td>349.0</td>\n",
       "      <td>0.131345</td>\n",
       "      <td>0.932017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Otu0072</td>\n",
       "      <td>10.333333</td>\n",
       "      <td>1.457627</td>\n",
       "      <td>326.0</td>\n",
       "      <td>0.184336</td>\n",
       "      <td>0.932017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Otu0001</td>\n",
       "      <td>2485.555556</td>\n",
       "      <td>4115.779661</td>\n",
       "      <td>193.5</td>\n",
       "      <td>0.194697</td>\n",
       "      <td>0.932017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        OTU     LVR_mean     NVR_mean  statistic   p_value     p_adj\n",
       "7   Otu0008    95.888889    40.932203      400.0  0.012107  0.472181\n",
       "24  Otu0036     3.666667    29.661017      183.0  0.122226  0.932017\n",
       "3   Otu0004  1217.444444   599.203390      349.0  0.131345  0.932017\n",
       "37  Otu0072    10.333333     1.457627      326.0  0.184336  0.932017\n",
       "0   Otu0001  2485.555556  4115.779661      193.5  0.194697  0.932017"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts_stats_v9_rows = list()\n",
    "for otu, row in counts_v9.iterrows():\n",
    "    lvr_abunds = row[meta_v9.query('VR_group == \"LVR\"').index]\n",
    "    nvr_abunds = row[meta_v9.query('VR_group == \"NVR\"').index]\n",
    "    # check for not all zeros\n",
    "    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2\n",
    "    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2\n",
    "    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10\n",
    "    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10\n",
    "    if lvr_gt_20 or nvr_gt_20:\n",
    "        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)\n",
    "        counts_stats_v9_rows.append([otu, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])\n",
    "counts_stats_v9 = pd.DataFrame(counts_stats_v9_rows, columns=['OTU', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')\n",
    "counts_stats_v9['p_adj'] = p_adjust(counts_stats_v9['p_value'])\n",
    "counts_stats_v9.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "512c00ba-d5ff-421b-a9f8-2779994ebc29",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
